** Clean CPI extracts  
** JHL 

*************************************
** Set up workspace
*************************************
version 14.0
clear all
set more off

cd "${path_home}"
adopath + ../programs

** log using "${path_log}/c06_cpi", text replace

*************************************
** Start work here
*************************************

import delimited "${path_big_dta}/bls/cpiu_cities.txt", clear varnames(1)

*************************************
** [1] Clean and reshape CPI city indices
*************************************

gen id=_n

** Change as needed
foreach v of varlist jan1914-half22016 {
* foreach v of varlist jan2004-half22016 {
	rename `v' v`v'
}
* cap destring vjan2004-half22016, replace
cap destring vjan1914-half22016, replace

reshape long v@, i(id) j(my2) string
rename v value
gen my=date(my2, "MY")
format my %td
gen month=month(my)
gen year=year(my)

gen ym=ym(year,month)
format ym %tm

gen area_code=substr(seriesid,5,4)
gen item_code=substr(seriesid,9,.)

* Merge with area and item codes 
merge m:1 area_code using "${path_big_dta}/bls/cu_area", keep(match) nogen keepus(area_name area_code)
merge m:1 item_code using "${path_big_dta}/bls/cu_item", keep(match) nogen keepus(item_name item_code)

sort seriesid year month

** Replace some revised value footnotes
replace value = subinstr(value,"(R)","",.)
replace value = "." if value == " "
destring value, replace

save "${path_big_dta}/bls/reshape_cpiu_cities", replace

*************************************
** [2] Exploring the data with simple plots 
*************************************

use "${path_big_dta}/bls/reshape_cpiu_cities", clear 

** tw (line value ym if area_code=="A101"&item_name=="Housing") (line value ym if area_code=="A101"&item_name=="Rent of primary residence")

* Monthly areas 
ta area_name if month!=.&value!=.

** Only 9 cities that do not overlap states 

split area_name, p(", ")
rename area_name1 city
rename area_name2 state

rename state stusab 
merge m:1 stusab using "${path_big_dta}/us_states/state", keep(match master) nogen keepus(state stusab)

** Reshape wide for items, unit of observation: area-period 
cap drop id
egen id=group(area_code my2)
	* Duplicates within id, need to drop 
drop seriesid item_name 
sort id
reshape wide value, i(id) j(item_code) string

sort area_code year month

** Label variables 
	** SAF Food and Beverages SAF1 Food SAF11 Food at home SAH Housing SAN Nondurables SANL1 Nondurables less food 
	** SANL11 Nondurables less food and bev 
	** SEFV Food away from home SEHA Rent of primary residence
	la var valueSAF "Food and Beverages"
	la var valueSAF1 "Food"
	la var valueSAF11 "Food at home"
	la var valueSAH "Housing" 
	la var valueSAN "Nondurables" 
	la var valueSANL1 "Nondurables less food" 
	la var valueSANL11 "Nondurables less food and bev" 
	la var valueSEFV "Food away from home" 
	la var valueSEHA "Rent of primary residence" 

duplicates drop area_name year month, force

** Some are bimonthly, some are monthly, set base as 2006
cap drop valueSAF_base-valueSEHA_base2
foreach v of varlist valueSAF valueSAF1 valueSAF11 valueSAH valueSAN valueSANL1 valueSANL11 valueSEFV valueSEHA {
	egen `v'_base=mean(`v') if year==2006&month==1, by(area_name year month) 
	egen `v'_base2=mean(`v') if year==2006&month==2, by(area_name year month) 
}

foreach v of varlist valueSAF valueSAF1 valueSAF11 valueSAH valueSAN valueSANL1 valueSANL11 valueSEFV valueSEHA {
	cap drop temp1 temp2 
	bys area_name (year month): egen temp1 = min(`v'_base)
	bys area_name (year month): egen temp2 = min(`v'_base)
	bys area_name (year month): replace `v'_base=temp1
	bys area_name (year month): replace `v'_base2=temp2
	cap drop temp1 temp2 
}

foreach v of varlist valueSAF valueSAF1 valueSAF11 valueSAH valueSAN valueSANL1 valueSANL11 valueSEFV valueSEHA {
	gen `v'_i=`v'/`v'_base
	replace `v'_i=`v'/`v'_base2 if `v'_i==. 
	gen `v'_logi=log(`v'_i)
}

save "${path_big_dta}/bls/reshape_cpiu_cities_06b", replace

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close